only ILC2 for topic modeling
source("../../../analysis.10x.r")
GEX.seur <- readRDS("adv_1110.GEX.seur_ILC2.Anno.rds")
GEX.seur
## An object of class Seurat
## 15083 features across 9076 samples within 1 assay
## Active assay: RNA (15083 features, 800 variable features)
## 3 dimensional reductions calculated: pca, tsne, umap
pumap.2 <- DimPlot(GEX.seur, reduction = "umap", label = F,group.by = "SP.info",
cols = c("#5A5C5F","#0000C8","#FDB911"))+ labs(title="")+ theme(aspect.ratio = 0.75)
pumap.1 <- DimPlot(GEX.seur, reduction = "umap", label = T, group.by = "seurat_clusters") + NoLegend()+ labs(title="")+ theme(aspect.ratio = 0.75)
pumap.2 + pumap.1
pumap.3 <- DimPlot(GEX.seur, group.by = "SP.info", split.by = "SP.info",cols = c("#5A5C5F","#0000C8","#FDB911"), ncol = 3)+ theme(aspect.ratio = 0.75) +
labs(title = "") + NoLegend()
pumap.3
pumap.4 <- DimPlot(GEX.seur, group.by = "SP.info", split.by = "SP.info",cols = c("#5A5C5F","#0000C8","#FDB911"), ncol = 1)+
labs(title = "") +
theme(aspect.ratio = 0.75) +
NoLegend()
pumap.4
basically follow https://bitbucket.org/jerry00/mouse_small_intestine_immune_cell_atlas/src/master/script/topicModeling_PP.R
source("topicmodel.r")
# using umap coordinates
GEX.seur@reductions[['umap']]@cell.embeddings[1:5,1:2]
## UMAP_1 UMAP_2
## AAACCCAAGCATCCTA-1 -1.1394443 3.0657358
## AAACCCAAGGGCAACT-1 -3.0438967 -0.2278638
## AAACCCAAGGTGCCAA-1 -3.9796829 -1.0369316
## AAACCCACAGAAGTGC-1 -0.7091646 -1.2534755
## AAACCCAGTAGGGTAC-1 4.3127932 1.5042106
# to exclude ribosomal
ribo.loc <- grepl("Rpl|Rps", rownames(GEX.seur@assays[['RNA']]@counts))
head(rownames(GEX.seur@assays[['RNA']]@counts)[ribo.loc])
## [1] "Rpl7" "Rpl31" "Rpl37a" "Rps6kc1" "Rpl7a" "Rpl12"
sum(ribo.loc)
## [1] 96
# Build topic models and generate assocaited figures
# Recommended to build models for k = 3,4,5,6,7,8,9,10 and tolerance 0.1 (set as inputs to this function)
# each K would take hours, bigger K, longer time
# so slow, would run into three sub-scripts from 3 to 10
#for(kk in c(3,4,5,6,7,8,9,10)){
# kkk <- kk
# if(kk %in% c(4,6,8)){
# kkk <- paste0(0,kk)
# }
# FitGoM(t(as.matrix(GEX.seur@assays[['RNA']]@counts)[!ribo.loc,]),
# K = kk, tol = 0.1,
# path_rda =paste0("topicmodel/test_topic.n",kkk,"_t0.1.rda"))
#}
# after topic models are done, ranged K,
# here could directly load the saved object
# the default var is 'Topic_clus'
load("topicmodel/test_topic.n08_t0.1.rda")
omega <- as.data.frame(Topic_clus$omega)
colnames(omega) <- paste0("Topic_", colnames(omega))
GEX.seur <- AddMetaData(GEX.seur, omega)
head(GEX.seur@meta.data)
## orig.ident nCount_RNA nFeature_RNA percent.mt FB.info
## AAACCCAAGCATCCTA-1 ILC2_GEX 13805 3251 3.940601 hashtag6
## AAACCCAAGGGCAACT-1 ILC2_GEX 5200 2039 3.518554 hashtag5
## AAACCCAAGGTGCCAA-1 ILC2_GEX 4848 1503 3.361518 hashtag1
## AAACCCACAGAAGTGC-1 ILC2_GEX 5665 1673 4.165931 hashtag3
## AAACCCAGTAGGGTAC-1 ILC2_GEX 3458 1574 1.098583 hashtag5
## AAACCCATCGGTCGAC-1 ILC2_GEX 3298 1169 4.093390 hashtag6
## S.Score G2M.Score Phase DoubletFinder0.05
## AAACCCAAGCATCCTA-1 -0.08767960 -0.3449294 G1 Singlet
## AAACCCAAGGGCAACT-1 -0.04374438 -0.1981341 G1 Singlet
## AAACCCAAGGTGCCAA-1 -0.03750102 -0.1720957 G1 Singlet
## AAACCCACAGAAGTGC-1 -0.11390714 -0.2737016 G1 Singlet
## AAACCCAGTAGGGTAC-1 -0.03319338 -0.1208448 G1 Singlet
## AAACCCATCGGTCGAC-1 -0.05704899 -0.2030185 G1 Singlet
## DoubletFinder0.1 preAnno_1 preAnno_2 SP.info
## AAACCCAAGCATCCTA-1 Singlet ILC2 ILC2_4 IL-33_DA
## AAACCCAAGGGCAACT-1 Singlet ILC2 ILC2_3 IL-33
## AAACCCAAGGTGCCAA-1 Singlet ILC2 ILC2_3 PBS
## AAACCCACAGAAGTGC-1 Singlet ILC2 ILC2_1 IL-33
## AAACCCAGTAGGGTAC-1 Singlet ILC2 ILC2_2 IL-33
## AAACCCATCGGTCGAC-1 Singlet ILC2 ILC2_3 IL-33_DA
## RNA_snn_res.0.6 seurat_clusters Anno_ILC2 Topic_1
## AAACCCAAGCATCCTA-1 1 1 ILC2_1 0.331038476
## AAACCCAAGGGCAACT-1 0 0 ILC2_0 0.320312192
## AAACCCAAGGTGCCAA-1 0 0 ILC2_0 0.486266747
## AAACCCACAGAAGTGC-1 0 0 ILC2_0 0.388178809
## AAACCCAGTAGGGTAC-1 3 3 ILC2_3 0.000364474
## AAACCCATCGGTCGAC-1 0 0 ILC2_0 0.378962662
## Topic_2 Topic_3 Topic_4 Topic_5 Topic_6
## AAACCCAAGCATCCTA-1 0.1559088 0.0826200026 0.004865470 0.17464599 0.01494368
## AAACCCAAGGGCAACT-1 0.2143718 0.0007200549 0.018321514 0.35710747 0.04640966
## AAACCCAAGGTGCCAA-1 0.2110820 0.0461481542 0.019537277 0.16199396 0.04409882
## AAACCCACAGAAGTGC-1 0.1497532 0.1755347758 0.001025182 0.07544155 0.15852893
## AAACCCAGTAGGGTAC-1 0.1149274 0.0018554194 0.426847626 0.16946351 0.13176387
## AAACCCATCGGTCGAC-1 0.3986307 0.0996796307 0.006114663 0.07988980 0.02645965
## Topic_7 Topic_8
## AAACCCAAGCATCCTA-1 0.199101457 0.036876086
## AAACCCAAGGGCAACT-1 0.039479830 0.003277505
## AAACCCAAGGTGCCAA-1 0.022333614 0.008539477
## AAACCCACAGAAGTGC-1 0.036283259 0.015254290
## AAACCCAGTAGGGTAC-1 0.153345182 0.001432542
## AAACCCATCGGTCGAC-1 0.005545364 0.004717507
tk <- 8
tp <- paste0("Topic_",1:tk)
outlier <- lapply(tp, function(x) which(GEX.seur@meta.data[,x]>(mean(GEX.seur@meta.data[,x], na.rm=TRUE)+
3*sd(GEX.seur@meta.data[,x], na.rm=TRUE))))
names(outlier) <- tp
#
max.nooutlier <- lapply(tp, function(x) if(length(unlist(outlier[x]))){
ceiling(10*max(GEX.seur@meta.data[-unlist(outlier[x]),x], na.rm=TRUE))/10
}else{
ceiling(10*max(GEX.seur@meta.data[,x], na.rm=TRUE))/10
})
names(max.nooutlier) <- tp
GEX.seur_override <- GEX.seur
for(xx in tp){
GEX.seur_override@meta.data[unlist(outlier[xx]),xx] <- rep(unlist(max.nooutlier[xx]))
}
p_umap <- lapply(tp, function(x) plot.umap.feature(GEX.seur_override,
x,
"meta",
pt.size = 0.5,
ncols = 1,
title = "weight",
lower = 0,
upper = unlist(max.nooutlier[x]),
na.color = "gray") + theme(aspect.ratio = 0.75))
## Warning: package 'plotly' was built under R version 4.0.4
##
## Attaching package: 'plotly'
## The following object is masked from 'package:XVector':
##
## slice
## The following object is masked from 'package:IRanges':
##
## slice
## The following object is masked from 'package:S4Vectors':
##
## rename
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
p_umap
## [[1]]
##
## [[2]]
##
## [[3]]
##
## [[4]]
##
## [[5]]
##
## [[6]]
##
## [[7]]
##
## [[8]]
cowplot::plot_grid(plotlist = p_umap, ncol = 4)
still in umap
p_umap.PBS <- lapply(tp, function(x) plot.umap.feature(subset(GEX.seur_override, subset=SP.info=="PBS"),
x,
"meta",
pt.size = 0.5,
ncols = 1,
title = "weight",
lower = 0,
upper = unlist(max.nooutlier[x]),
na.color = "gray") + theme(aspect.ratio = 0.75))
p_umap.PBS
## [[1]]
##
## [[2]]
##
## [[3]]
##
## [[4]]
##
## [[5]]
##
## [[6]]
##
## [[7]]
##
## [[8]]
cowplot::plot_grid(plotlist = p_umap.PBS, ncol = 4)
p_umap.IL33 <- lapply(tp, function(x) plot.umap.feature(subset(GEX.seur_override, subset=SP.info=="IL-33"),
x,
"meta",
pt.size = 0.5,
ncols = 1,
title = "weight",
lower = 0,
upper = unlist(max.nooutlier[x]),
na.color = "gray") + theme(aspect.ratio = 0.75))
p_umap.IL33
## [[1]]
##
## [[2]]
##
## [[3]]
##
## [[4]]
##
## [[5]]
##
## [[6]]
##
## [[7]]
##
## [[8]]
cowplot::plot_grid(plotlist = p_umap.IL33, ncol = 4)
p_umap.IL33_DA <- lapply(tp, function(x) plot.umap.feature(subset(GEX.seur_override, subset=SP.info=="IL-33_DA"),
x,
"meta",
pt.size = 0.5,
ncols = 1,
title = "weight",
lower = 0,
upper = unlist(max.nooutlier[x]),
na.color = "gray") + theme(aspect.ratio = 0.75))
p_umap.IL33_DA
## [[1]]
##
## [[2]]
##
## [[3]]
##
## [[4]]
##
## [[5]]
##
## [[6]]
##
## [[7]]
##
## [[8]]
cowplot::plot_grid(plotlist = p_umap.IL33_DA, ncol = 4)
theta <- as.data.frame(Topic_clus$theta)
colnames(theta) <- paste0("Topic_", 1:tk)
feat.topic <- ExtractTopFeatures(theta, top_features = 100)
feat.topic_genes <- as.data.frame(sapply(1:tk, function(x) rownames(theta)[feat.topic$indices[x,]]))
colnames(feat.topic_genes) <- paste0("topic_", 1:tk)
feat.topic_scores <- as.data.frame(feat.topic$scores)
rownames(feat.topic_scores) <- paste0("topic_", 1:tk)
plot.feat.topic <- data.frame()
for(j in 1:nrow(feat.topic_scores)){
current.topic <- as.data.frame(t(feat.topic_scores[j,]))
colnames(current.topic) <- "score"
rownames(current.topic) <- feat.topic_genes[,j]
current.topic$genes <- feat.topic_genes[,j]
current.topic.top <- current.topic[1:40,]
current.topic.top$topic <- rep(colnames(feat.topic_genes)[j], nrow(current.topic.top))
current.topic.top$genes <- factor(current.topic.top$genes, levels=rev(current.topic.top$genes))
current.topic.top$order <- paste0(current.topic.top$genes, "_", current.topic.top$topic)
current.topic.top$order <- factor(current.topic.top$order, levels=rev(current.topic.top$order))
plot.feat.topic <- rbind(plot.feat.topic, current.topic.top)
}
plot.feat.topic$topic <- factor(plot.feat.topic$topic, levels=unique(plot.feat.topic$topic))
bar plots
p_bar <- ggplot(data = plot.feat.topic, aes(x = order, y = score)) +
geom_bar(stat = "identity") +
coord_flip() +
theme(axis.text.x = element_text(angle = 45, hjust = 1),
axis.text = element_text(size = 5), text = element_text(size = 8),
axis.line = element_line(colour = "black"),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
panel.background = element_blank(), strip.background = element_rect(color = "white", fill = "white")) +
facet_wrap(~topic, scales="free", ncol = 4) +
scale_x_discrete(breaks = plot.feat.topic$order, labels = plot.feat.topic$genes)
p_bar
plot expression of top 50 genes
topic.genes <- lapply(tolower(tp), function(x) plot.feat.topic$genes[which(plot.feat.topic$topic==x)])
names(topic.genes) <- tolower(tp)
for(t in 1:tk){
p_genes <- lapply(unlist(topic.genes[t]), function(y) plot.umap.feature(GEX.seur, y, "gene",
pt.size = 0.5,
ncols = 1, title = "expression") + theme(aspect.ratio = 0.75))
# arrange plots on a grid
cowplot::plot_grid(plotlist=p_genes, ncol=4)
# ggsave
ggsave(paste0("topicmodel/K",tk,"/topics_top40_expression_on_UMAP_",tp[t],"_k",tk,"_tol0.1.pdf"),
width = 13, height = 20, units = "in")
}
## code from Dianyu's github
# https://github.com/chendianyu/2021_NI_scGCB
## Connect clustering and topic model
cluster_topic_mtx <- GEX.seur_override@meta.data %>%
#filter(seurat_clusters != 6) %>%
gather(key = "topic", value = "weight", paste0("Topic_", 1:8), factor_key = T) %>%
group_by(Anno_ILC2, topic) %>%
summarise(weight = mean(weight)) %>%
spread(key = topic, value = weight)
cluster_topic_mtx <- column_to_rownames(cluster_topic_mtx, var = "Anno_ILC2")
topic_order <- order(max.col(t(apply(cluster_topic_mtx, 2, scale))))
pheatmap::pheatmap(t(cluster_topic_mtx)[rev(c(8,4,3,7,1,2,5,6)),c(2,1,3,4,5,6)],
cluster_rows = F,
cluster_cols = F,
scale = "row",
main= "Relation of Topics and Clusters")
## code from Dianyu's github
# https://github.com/chendianyu/2021_NI_scGCB
## Connect clustering and topic model
plot_topic_cluster <- function(obj,topic){
obj@meta.data %>%
#filter(seurat_clusters!=6) %>%
ggplot(aes_string("seurat_clusters", paste0("Topic_", topic), color = "seurat_clusters")) +
geom_violin(draw_quantiles=c(0.25,0.75)) +
ggbeeswarm::geom_quasirandom(size = 0.01, width = 0.3, alpha = 0.5) +
stat_summary(fun.y=mean, geom="point", shape=18, size=1, color="black") +
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.background = element_blank(),
panel.border = element_rect(fill = NA))
}
pvnl <- list()
for(tt in 1:tk){
pvnl[[tt]] <- plot_topic_cluster(GEX.seur_override,tt) + labs(x="")
}
pvnl
## [[1]]
##
## [[2]]
##
## [[3]]
##
## [[4]]
##
## [[5]]
##
## [[6]]
##
## [[7]]
##
## [[8]]
cowplot::plot_grid(plotlist=pvnl, ncol=4)
## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm):
## collapsing to unique 'x' values
## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm):
## collapsing to unique 'x' values
## Warning in regularize.values(x, y, ties, missing(ties), na.rm = na.rm):
## collapsing to unique 'x' values
#### subset Seurat object for condtions
## PBS
topic.avg.PBS <- sapply(tp, function(x){
mean(GEX.seur_override@meta.data[GEX.seur_override$SP.info=="PBS",x])
})
names(topic.avg.PBS) <- tp
## IL33
topic.avg.IL33 <- sapply(tp, function(x){
mean(GEX.seur_override@meta.data[GEX.seur_override$SP.info=="IL-33",x])
})
names(topic.avg.IL33) <- tp
## IL33_DA
topic.avg.IL33_DA <- sapply(tp, function(x){
mean(GEX.seur_override@meta.data[GEX.seur_override$SP.info=="IL-33_DA",x])
})
names(topic.avg.IL33_DA) <- tp
## combine
topic.avg.all <- rbind(topic.avg.PBS,topic.avg.IL33,topic.avg.IL33_DA)
rownames(topic.avg.all) <- c("PBS","IL33","IL33_DA")
####
topic.avg.PBS
## Topic_1 Topic_2 Topic_3 Topic_4 Topic_5 Topic_6 Topic_7
## 0.26888025 0.18235533 0.12535357 0.11290707 0.09042002 0.11799436 0.06895272
## Topic_8
## 0.03166280
topic.avg.IL33
## Topic_1 Topic_2 Topic_3 Topic_4 Topic_5 Topic_6 Topic_7
## 0.27819400 0.15939374 0.09620652 0.09054758 0.11241967 0.10394656 0.11728610
## Topic_8
## 0.04010939
topic.avg.IL33_DA
## Topic_1 Topic_2 Topic_3 Topic_4 Topic_5 Topic_6 Topic_7
## 0.25980210 0.16610987 0.12375749 0.12401408 0.10625613 0.09756779 0.08052668
## Topic_8
## 0.03996384
topic.avg.all
## Topic_1 Topic_2 Topic_3 Topic_4 Topic_5 Topic_6
## PBS 0.2688802 0.1823553 0.12535357 0.11290707 0.09042002 0.11799436
## IL33 0.2781940 0.1593937 0.09620652 0.09054758 0.11241967 0.10394656
## IL33_DA 0.2598021 0.1661099 0.12375749 0.12401408 0.10625613 0.09756779
## Topic_7 Topic_8
## PBS 0.06895272 0.03166280
## IL33 0.11728610 0.04010939
## IL33_DA 0.08052668 0.03996384
gp.list.box <- list()
gp.list.avg <- list()
gp.list.cum <- list()
for(top in tp){
topic.data <- data.frame(weight = c(GEX.seur_override@meta.data[GEX.seur_override$SP.info=="PBS",top],
GEX.seur_override@meta.data[GEX.seur_override$SP.info=="IL-33",top],
GEX.seur_override@meta.data[GEX.seur_override$SP.info=="IL-33_DA",top]),
condition = c(rep("PBS", length(which(GEX.seur_override$SP.info=="PBS"))),
rep("IL33", length(which(GEX.seur_override$SP.info=="IL-33"))),
rep("IL33_DA", length(which(GEX.seur_override$SP.info=="IL-33_DA")))),
topic = rep(top, nrow(GEX.seur_override@meta.data)))
topic.data$condition <- factor(topic.data$condition, levels = c("PBS","IL33","IL33_DA"))
gp.list.box[[top]] <- ggplot(topic.data, aes(y=weight, x=condition, color=condition)) +
geom_boxplot() + scale_y_continuous(limits=c(0,max.nooutlier[[top]])) +
facet_wrap(~ topic, ncol=1) +
scale_color_manual(values = c("#5A5C5F","#0000C8","#FDB911")) +
theme_bw() + theme(panel.grid=element_blank(),
strip.background = element_rect(color = "white", fill = "white"),
panel.border = element_blank(),
axis.line = element_line(colour = "black"))
avg.data <- as.data.frame(topic.avg.all[,top])
avg.data$topic <- rep(top, nrow(avg.data))
colnames(avg.data) <- c("average", "topic")
gp.list.avg[[top]] <- ggplot(avg.data, aes(x=factor(rownames(avg.data),levels = c("PBS","IL33","IL33_DA")),
y=average,
color=factor(rownames(avg.data),levels = c("PBS","IL33","IL33_DA")))) +
geom_col(fill="white") + xlab('') +
theme(axis.text.x = element_text(angle = 45, hjust = 1), legend.title = element_text(), strip.background = element_rect(color = "white", fill = "white")) +
facet_wrap(~ topic, ncol = 1) +
theme_bw() + theme(panel.grid=element_blank(),
strip.background = element_rect(color = "white", fill = "white"),
panel.border = element_blank(),
axis.line = element_line(colour = "black")) +
scale_color_manual(values = c("#5A5C5F","#0000C8","#FDB911"),name="condition")
gp.list.cum[[top]] <- ggplot(topic.data, aes(x=weight, color=condition)) +
stat_ecdf(geom="step", size = 1) +
scale_color_manual(values = c("#5A5C5F","#0000C8","#FDB911")) +
facet_wrap(~ topic, ncol=1) + labs(y="")+
theme_bw() + theme(panel.grid =element_blank(),
strip.background = element_rect(color = "white", fill = "white"),
panel.border = element_blank(),
axis.line = element_line(colour = "black"))
}
cowplot::plot_grid(plotlist=gp.list.box, ncol=4)
cowplot::plot_grid(plotlist=gp.list.avg, ncol=4)
cowplot::plot_grid(plotlist=gp.list.cum, ncol=4)
between each two conditions
# PBS vs IL33
p.PBS_vs_IL33.wilcox <- lapply(tp, function(x){
wilcox.test(GEX.seur_override@meta.data[GEX.seur_override$SP.info=="PBS",x],
GEX.seur_override@meta.data[GEX.seur_override$SP.info=="IL-33",x])
})
names(p.PBS_vs_IL33.wilcox) <- tp
pmat.PBS_vs_IL33.wilcox <- sapply(tp, function(x) p.PBS_vs_IL33.wilcox[[x]]$p.value)
# PBS vs IL33_DA
p.PBS_vs_IL33DA.wilcox <- lapply(tp, function(x){
wilcox.test(GEX.seur_override@meta.data[GEX.seur_override$SP.info=="PBS",x],
GEX.seur_override@meta.data[GEX.seur_override$SP.info=="IL-33_DA",x])
})
names(p.PBS_vs_IL33DA.wilcox) <- tp
pmat.PBS_vs_IL33DA.wilcox <- sapply(tp, function(x) p.PBS_vs_IL33DA.wilcox[[x]]$p.value)
# IL33 vs IL33_DA
p.IL33DA_vs_IL33.wilcox <- lapply(tp, function(x){
wilcox.test(GEX.seur_override@meta.data[GEX.seur_override$SP.info=="IL-33_DA",x],
GEX.seur_override@meta.data[GEX.seur_override$SP.info=="IL-33",x])
})
names(p.IL33DA_vs_IL33.wilcox) <- tp
pmat.IL33DA_vs_IL33.wilcox <- sapply(tp, function(x) p.IL33DA_vs_IL33.wilcox[[x]]$p.value)
# combine pvalue
pval.conditions.wilcox <- data.frame(PBS_vs_IL33= pmat.PBS_vs_IL33.wilcox ,
PBS_vs_IL33DA= pmat.PBS_vs_IL33DA.wilcox ,
IL33_vs_IL33DA= pmat.IL33DA_vs_IL33.wilcox)
# combine padjust
padj.conditions.wilcox <- data.frame(PBS_vs_IL33= p.adjust(pmat.PBS_vs_IL33.wilcox,method = "BH"),
PBS_vs_IL33DA= p.adjust(pmat.PBS_vs_IL33DA.wilcox,method = "BH"),
IL33_vs_IL33DA= p.adjust(pmat.IL33DA_vs_IL33.wilcox,method = "BH"))
pmerge.conditions.wilcox <- cbind(pval.conditions.wilcox,padj.conditions.wilcox)
colnames(pmerge.conditions.wilcox) <- c(paste0(colnames(pval.conditions.wilcox),".pval"),
paste0(colnames(padj.conditions.wilcox),".padj"))
pval.conditions.wilcox
## PBS_vs_IL33 PBS_vs_IL33DA IL33_vs_IL33DA
## Topic_1 7.006054e-02 4.384706e-02 6.471731e-10
## Topic_2 5.683860e-11 8.000171e-06 8.978170e-04
## Topic_3 6.612976e-24 5.306788e-01 2.711401e-47
## Topic_4 1.582588e-06 6.085626e-01 1.442550e-15
## Topic_5 1.099733e-12 2.310336e-05 6.987039e-05
## Topic_6 3.987903e-06 4.372660e-11 2.116977e-03
## Topic_7 2.729652e-31 5.773879e-02 6.692069e-47
## Topic_8 2.730927e-05 6.214584e-07 3.220758e-01
padj.conditions.wilcox
## PBS_vs_IL33 PBS_vs_IL33DA IL33_vs_IL33DA
## Topic_1 7.006054e-02 7.015530e-02 1.294346e-09
## Topic_2 1.136772e-10 2.133379e-05 1.197089e-03
## Topic_3 2.645191e-23 6.064901e-01 2.169121e-46
## Topic_4 2.532141e-06 6.085626e-01 3.846801e-15
## Topic_5 2.932621e-12 4.620671e-05 1.117926e-04
## Topic_6 5.317204e-06 3.498128e-10 2.419402e-03
## Topic_7 2.183722e-30 7.698505e-02 2.676828e-46
## Topic_8 3.121059e-05 2.485834e-06 3.220758e-01
pmerge.conditions.wilcox
## PBS_vs_IL33.pval PBS_vs_IL33DA.pval IL33_vs_IL33DA.pval
## Topic_1 7.006054e-02 4.384706e-02 6.471731e-10
## Topic_2 5.683860e-11 8.000171e-06 8.978170e-04
## Topic_3 6.612976e-24 5.306788e-01 2.711401e-47
## Topic_4 1.582588e-06 6.085626e-01 1.442550e-15
## Topic_5 1.099733e-12 2.310336e-05 6.987039e-05
## Topic_6 3.987903e-06 4.372660e-11 2.116977e-03
## Topic_7 2.729652e-31 5.773879e-02 6.692069e-47
## Topic_8 2.730927e-05 6.214584e-07 3.220758e-01
## PBS_vs_IL33.padj PBS_vs_IL33DA.padj IL33_vs_IL33DA.padj
## Topic_1 7.006054e-02 7.015530e-02 1.294346e-09
## Topic_2 1.136772e-10 2.133379e-05 1.197089e-03
## Topic_3 2.645191e-23 6.064901e-01 2.169121e-46
## Topic_4 2.532141e-06 6.085626e-01 3.846801e-15
## Topic_5 2.932621e-12 4.620671e-05 1.117926e-04
## Topic_6 5.317204e-06 3.498128e-10 2.419402e-03
## Topic_7 2.183722e-30 7.698505e-02 2.676828e-46
## Topic_8 3.121059e-05 2.485834e-06 3.220758e-01
from Lung ILC2 SS2 data ‘DA vs PBS’ (both have IL33)
DEGs.LungILC2_DAvsPBS <- read.csv("SS2_data/DESeq2_DEGs_20210722.DA_vs_PBS.csv")
rownames(DEGs.LungILC2_DAvsPBS) <- DEGs.LungILC2_DAvsPBS$gene
head(DEGs.LungILC2_DAvsPBS)
## gene baseMean log2FoldChange lfcSE stat pvalue
## AA467197 AA467197 44.95977 -3.829955 0.5962771 -6.423112 1.34e-10
## Bmp2 Bmp2 35.92400 3.504667 0.5794070 6.048713 1.46e-09
## Ccnb2 Ccnb2 74.74773 -2.815492 0.4675964 -6.021200 1.73e-09
## Rasa3 Rasa3 119.95292 2.220907 0.3702769 5.997963 2.00e-09
## Mrps12 Mrps12 49.92089 -2.544398 0.4440691 -5.729734 1.01e-08
## Nr1d2 Nr1d2 46.45192 2.761307 0.4808902 5.742073 9.35e-09
## padj FC
## AA467197 5.75e-07 -14.221036
## Bmp2 3.44e-06 11.350364
## Ccnb2 3.44e-06 -7.039591
## Rasa3 3.44e-06 4.661865
## Mrps12 1.24e-05 -5.833646
## Nr1d2 1.24e-05 6.780100
DEGs.DA_up <- (DEGs.LungILC2_DAvsPBS %>% filter(padj < 0.05 & log2FoldChange > 1))$gene
DEGs.DA_dn <- (DEGs.LungILC2_DAvsPBS %>% filter(padj < 0.05 & log2FoldChange < -1))$gene
using code on
https://github.com/chendianyu/2021_NI_scGCB/blob/main/script/Signature_score.Rmd
which is from Adam Hamber’s
## The code below is from Adam Hamber
## 2D scoring by Itay
get_controls <- function(counts, gene.list, verbose=F, control.genes.per.gene=10)
{
# Itay: "Such scores are inevitably correlated with cell complexity so to avoid
# that I subtract a "control" score which is generated by averaging over a control
# gene set. Control gene sets are chosen to contain 100 times more genes than the
# real gene set (analogous to averaging over 100 control sets of similar size) and
# to have the same distribution of population/bulk - based expression levels as the
# real gene set, such that they are expected to have the same number of "zeros" and
# to eliminate the correlation with complexity."
# ---------------------------------------------------------------------------------
# Going to find control points by finding the closest genes in terms of expression level and % of the time we observe it
if(verbose){cat(sprintf("Finding %s background genes based on similarity to given gene set [%s genes] \n",
control.genes.per.gene*length(gene.list), length(gene.list)))}
cat("Summarizing data \n")
summary = data.frame(gene=row.names(counts), mean.expr = Matrix::rowMeans(counts), fract.zero = Matrix::rowMeans(counts==0), stringsAsFactors = F)
#summary = data.frame(gene=row.names(counts), mean.expr = apply(counts,1,mean), fract.zero = apply(counts==0,1,mean), stringsAsFactors = F)
summary$mean.expr.s = scale(summary$mean.expr)
summary$fract.zero.s = scale(summary$fract.zero)
actual.genes = summary[summary$gene %in% gene.list,]
background.genes = summary[!summary$gene %in% gene.list,]
#find the 10 closest genes to each cell cycle marker gene and add them to the lists of control genes
get_closest_genes <- function(i)
{
background.genes$dist = sqrt((background.genes$mean.expr.s - actual.genes$mean.expr.s[i])^2 +
(background.genes$fract.zero.s - actual.genes$fract.zero.s[i])^2)
ordered = background.genes$gene[order(background.genes$dist)]
ordered = ordered[!ordered %in% controls] # don't take genes that already appear in the list
closest = head(ordered, n=control.genes.per.gene)
return(closest)
}
controls = c();
for (i in 1:length(gene.list)){
#info(sprintf("Finding %s control genes for %s", control.genes.per.gene, gene.list[i]))
closest = get_closest_genes(i)
#info(sprintf("Found %s: ", length(closest)))
controls = unique(c(controls, closest))
}
if(verbose){cat(sprintf("Control gene selection complete. %s genes found. \n", length(controls)))}
#print(controls)
return(controls)
}
## Define calculate function
calculate_signature_score <- function(count_matrix, gene_list){
control_gene <- get_controls(counts = count_matrix,
gene.list = gene_list)
signature_score <- colMeans(count_matrix[gene_list, ], na.rm = TRUE) -
colMeans(count_matrix[control_gene, ], na.rm = TRUE)
return(signature_score)
}
score.DA_up <- calculate_signature_score(as.data.frame(GEX.seur@assays[['RNA']]@data),
DEGs.DA_up)
## Summarizing data
#score.DA_up
score.DA_dn <- calculate_signature_score(as.data.frame(GEX.seur@assays[['RNA']]@data),
DEGs.DA_dn)
## Summarizing data
#score.DA_dn
GEX.seur_score <- GEX.seur
GEX.seur_score <- AddMetaData(GEX.seur_score,
score.DA_up,
"DAup_score")
GEX.seur_score <- AddMetaData(GEX.seur_score,
score.DA_dn,
"DAdn_score")
## violin plot
vln.DA_up <- GEX.seur_score@meta.data %>%
ggplot(aes(SP.info, DAup_score, color = SP.info)) +
geom_violin(draw_quantiles=c(0.25,0.75),trim = FALSE) +
#ylim(c(-0.25,0.35)) +
ggbeeswarm::geom_quasirandom(size = 0.01, width = 0.2, alpha = 0.3) +
stat_summary(fun.y=mean, geom="point", shape=18, size=3, color="black") +
ggpubr::stat_compare_means(aes(lable = ..p.signif..),
method = "wilcox.test",
comparisons = list(c("IL-33","IL-33_DA"),
c("IL-33","PBS"),
c("PBS","IL-33_DA"))) +
scale_color_manual(values = c("#5A5C5F","#0000C8","#FDB911")) +
labs(x="") +
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.background = element_blank(),
panel.border = element_rect(fill = NA))
vln.DA_up + NoLegend()
## violin plot
vln.DA_dn <- GEX.seur_score@meta.data %>%
ggplot(aes(SP.info, DAdn_score, color = SP.info)) +
geom_violin(draw_quantiles=c(0.25,0.75),trim = FALSE) +
#ylim(c(-0.25,0.35)) +
ggbeeswarm::geom_quasirandom(size = 0.01, width = 0.2, alpha = 0.3) +
stat_summary(fun.y=mean, geom="point", shape=18, size=3, color="black") +
ggpubr::stat_compare_means(aes(lable = ..p.signif..),
method = "wilcox.test",
comparisons = list(c("IL-33","IL-33_DA"),
c("IL-33","PBS"),
c("PBS","IL-33_DA"))) +
scale_color_manual(values = c("#5A5C5F","#0000C8","#FDB911")) +
labs(x="") +
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.background = element_blank(),
panel.border = element_rect(fill = NA))
vln.DA_dn + NoLegend()